Setup

Packages Used

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.1.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
if (!require("ggbeeswarm")) install.packages("ggbeeswarm")
## Loading required package: ggbeeswarm
library(ggbeeswarm)
if (!require("plotly")) install.packages("plotly")
## Loading required package: plotly
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(plotly)

Datasets Used

bills <- read_csv("../dataraw/billionaires_2021_10_31.csv")
## Rows: 500 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Name, Total_Net_Worth, LastChange, YTDChange, Country, Industry
## dbl (4): Rank, Total_Net_Worth_Bil, LastChange_Bil, YTDChange_Bil
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(bills)
## Rows: 500
## Columns: 10
## $ Rank                <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,…
## $ Name                <chr> "Elon Musk", "Jeff Bezos", "Bernard Arnault", "Bil…
## $ Total_Net_Worth     <chr> "$311B", "$195B", "$167B", "$136B", "$131B", "$126…
## $ Total_Net_Worth_Bil <dbl> 311.0, 195.0, 167.0, 136.0, 131.0, 126.0, 121.0, 1…
## $ LastChange          <chr> "+$9.32B", "-$3.79B", "-$544M", "+$906M", "+$1.71B…
## $ LastChange_Bil      <dbl> 9.3200, -3.7900, -0.5440, 0.9060, 1.7100, 1.6400, …
## $ YTDChange           <chr> "+$141B", "+$5.06B", "+$52.7B", "+$4.40B", "+$48.7…
## $ YTDChange_Bil       <dbl> 141.00, 5.06, 52.70, 4.40, 48.70, 46.70, 17.30, 37…
## $ Country             <chr> "UnitedStates", "UnitedStates", "France", "UnitedS…
## $ Industry            <chr> "Technology", "Technology", "Consumer", "Technolog…
is_tibble(bills)
## [1] TRUE
# bills[bills == "$121B"] <- NA  # Check that is.na check is checking correctly.  
#   ###
#   ###  Re-run read_cvs to refresh dataframe after testing
#   ###
#
# for (i in 1:ncol(bills)) {
#   print(which(is.na(bills[ ,i])))
# }


# ggplot(bills, aes(LastChange_Bil, Total_Net_Worth_Bil, color = Industry)) +
#     geom_point() +
#     scale_x_continuous(trans = "log2") +
#     scale_y_continuous(trans = "log2") 

bills_ex <- bills %>%
  group_by(Industry) %>%
  summarise(Sum = sum(Total_Net_Worth_Bil))

bills_red <- bills %>%
  mutate(
    Industry = ifelse(
        bills$Industry == "Technology" |
        bills$Industry == "Industrial" |
        bills$Industry == "Finance" |
        bills$Industry == "Diversified"|
        bills$Industry == "Consumer",
      Industry,
      "Other"),
    LastChange_Prop = (LastChange_Bil + Total_Net_Worth_Bil) / Total_Net_Worth_Bil,
    LastChange_Coef = (LastChange_Bil + mean(Total_Net_Worth_Bil)) / mean(Total_Net_Worth_Bil)
  )

bills_rex <- bills_red %>%
  group_by(Industry) %>%
  summarise(Sum = sum(Total_Net_Worth_Bil))

jitter <- position_jitter(width = 0.2, height = NULL, seed = 2)
# ggplot(bills_red, aes(x = Industry, fill = Industry)) +
#     geom_bar()
# 
# ggplot(bills, aes(x = Industry, fill = Industry)) +
#     geom_bar() +
#     coord_flip()
# 
# ggplot(bills_ex, aes(Industry, Sum, fill = Industry)) +
#     stat_summary(geom="bar", position = "stack") +
#     xlab("Sector") +
#     coord_flip()
# 
# ggplot(bills_rex, aes(Industry, Sum, fill = Industry)) +
#     stat_summary(geom="bar", position = "stack") +
#     xlab("Sector")
# 
# 
# 
# glimpse(strmtv_long)
# plot(bills, aes(LastChange_Bil, Total_Net_Worth_Bil, color = Country)) +
#     geom_point() +
#     scale_x_continuous(n.breaks = 10) +
#     scale_y_continuous(n.breaks = 10) 

# ggplot(bills_red,
#        aes(
#          x = 1,
#          y = LastChange_Prop,
#          color = Industry,
#          size = Total_Net_Worth_Bil
#        )) +
#   geom_point(position = jitter, alpha = 0.8) +
#   scale_size(range = c(0, 20))
# 
# ggplot(bills_red,
#        aes(
#          x = 1,
#          y = LastChange_Coef,
#          color = Industry,
#          size = Total_Net_Worth_Bil
#        )) +
#   geom_point(position = jitter) +
#   scale_size(range = c(0, 20))
p <- ggplot(bills_red,
       aes(
         x = 1,
         y = YTDChange_Bil,
         color = Industry,
         size = Total_Net_Worth_Bil,
         text = Name
       )) +
  geom_point(position = jitter) +
  geom_point(shape = 1, colour = "gray50", alpha = .2, position = jitter) +
  scale_y_continuous(n.breaks = 11) +
  scale_size(range = c(0, 20))

 ggplotly(p, tooltip = c("text", "size"))
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-redhat-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/R/lib/libRblas.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] plotly_4.10.0    ggbeeswarm_0.6.0 forcats_0.5.1    stringr_1.4.0   
##  [5] dplyr_1.0.7      purrr_0.3.4      readr_2.1.0      tidyr_1.1.3     
##  [9] tibble_3.1.6     ggplot2_3.3.5    tidyverse_1.3.1 
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.7        lubridate_1.8.0   assertthat_0.2.1  digest_0.6.28    
##  [5] utf8_1.2.2        R6_2.5.1          cellranger_1.1.0  backports_1.4.0  
##  [9] reprex_2.0.1      evaluate_0.14     httr_1.4.2        pillar_1.6.4     
## [13] rlang_0.4.12      lazyeval_0.2.2    readxl_1.3.1      rstudioapi_0.13  
## [17] data.table_1.14.2 jquerylib_0.1.4   rmarkdown_2.11    labeling_0.4.2   
## [21] htmlwidgets_1.5.4 bit_4.0.4         munsell_0.5.0     broom_0.7.10     
## [25] compiler_3.6.0    vipor_0.4.5       modelr_0.1.8      xfun_0.28        
## [29] pkgconfig_2.0.3   htmltools_0.5.2   tidyselect_1.1.1  viridisLite_0.4.0
## [33] fansi_0.5.0       crayon_1.4.2      tzdb_0.2.0        dbplyr_2.1.1     
## [37] withr_2.4.2       grid_3.6.0        jsonlite_1.7.2    gtable_0.3.0     
## [41] lifecycle_1.0.1   DBI_1.1.1         magrittr_2.0.1    scales_1.1.1     
## [45] vroom_1.5.6       cli_3.1.0         stringi_1.7.5     farver_2.1.0     
## [49] fs_1.5.0          xml2_1.3.2        bslib_0.3.1       ellipsis_0.3.2   
## [53] generics_0.1.1    vctrs_0.3.8       tools_3.6.0       bit64_4.0.5      
## [57] glue_1.5.0        beeswarm_0.4.0    crosstalk_1.2.0   hms_1.1.1        
## [61] parallel_3.6.0    fastmap_1.1.0     yaml_2.2.1        colorspace_2.0-2 
## [65] rvest_1.0.2       knitr_1.33        haven_2.4.3       sass_0.4.0